if (!dir.exists("Spf66_supplementary_fits")) dir.create("Spf66_supplementary_fits")
if (!dir.exists("Spf66_supplementary_fits/Sporozoite_ratio")) dir.create("Spf66_supplementary_fits/Sporozoite_ratio")
if (!dir.exists("Spf66_supplementary_fits/Age_stratified_FOI")) dir.create("Spf66_supplementary_fits/Age_stratified_FOI")
RUN_ALL <- FALSE # flag whether or not to re-run fitting
# source code for model fitting
source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
source("Model_calibration_code/Metropolis_Hastings_fit.R")
source("Model_calibration_code/simulate_clinical_recurrences.R")
# pre-processed data from SPf66 cohort for model fitting
patient_metadata <- read_rds("Spf66_data_cleaned/patient_metadata.rds")
inf_states_by_VIN <- read_rds("Spf66_data_processed/discretised_infection_matrix.rds")
SEASONALITY <- read_rds("Spf66_data_processed/seasonality_vector.rds") # inferred from Pf
N_COHORT <- nrow(patient_metadata)
keep_VIN <- as.character(patient_metadata$VIN)
# results for baseline model fitting
MCMC_results_main <- read_rds("Spf66_calibration/MCMC_results_main.rds")
# discretisation into uniform windows
START_DATE <- as.Date("1993-10-01")
END_DATE <- as.Date("1995-07-15")
STUDY_DURATION <- as.numeric(difftime(END_DATE, START_DATE, unit="days"))
TIME_STEP <- 10
N_OBS <- STUDY_DURATION%/%10
SHIFT_WINDOW <- 20 # estimate Pv FOI separately before and after this window
YEAR_IN_TIMESTEP <- 365%/%TIME_STEP
THRESHOLD_AGE <- 2*YEAR_IN_TIMESTEP # possible stratification in FORI above/below this age
# fixed covariates
PREL_BASELINE <- 0.4
N_AGES <- patient_metadata[names(inf_states_by_VIN), "AGE"]*YEAR_IN_TIMESTEP # in units of TIME_STEP
names(N_AGES) <- names(inf_states_by_VIN)
MIN_AGE <- min(N_AGES)
MAX_AGE <- max(N_AGES)
# number of chains and iterations for MCMC (nested parallelisation over chains)
N_CHAINS <- 4
N_ITER <- 100000
BURNIN_PROP <- 0.2
N_CORES_PER_CHAIN <- 8
# parameters for Metropolis-Hastings proposal
LAMBDA_PROP_SD <- 0.02/365
NU_PROP_SD <- 0.2
ETA_PROP_SD <- 1/2000
LOGIT_RHO_PROP_SD <- 0.05
LOG_GAMMA_PROP_SD <- 0.05
# hyperparameters for informative priors
LOG_GAMMA_PRIOR_SD <- 0.6
LOGIT_RHO_PRIOR_SD <- 0.7
# parameters for posterior predictive data
N_POSTERIOR_PREDICTIVE_DATASETS <- 2000
PROPHYLAXIS_WINDOW <- 1
BUNCHING_WINDOW <- 2
# observed data for posterior predictive checking
masking_matrix <- read_rds("Spf66_data_processed/discretised_masking_matrix.rds")
observed_incidence_by_indiv <- read_rds("Spf66_data_processed/incidence_by_individual.rds")
# formatting
POSTERIOR_COL <- "#cf7940"
OBSERVED_COL <- "#4878b8"
The ratio of hypnozoites to immediately-developing forms in the baseline model is informed by in vivo estimates for the Chesson strain, which may not be representative for Southeast Asian strains. We thus perform a sensitivity analysis over the sporozoite ratio.
PREL_VALS <- c(0.1, 0.25, 0.6, 0.75, 0.9)
prel_seeds <- c("11061897", "19121927", "22101900", "19121927", "03101924")
MCMC_results_prel <- list()
for (n in 1:length(PREL_VALS)) {
if (!file.exists(paste0("Spf66_supplementary_fits/Sporozoite_ratio/prel_sensitivity_analysis_", n, ".rds"))) {
set.seed(prel_seeds[n])
# rescale LAMBDA2 to be the mean FOI in the second stage of the study
MCMC_results <- Metropolis_Hastings(inf_states_by_VIN, N_AGES, PREL_VALS[n], N_OBS, TIME_STEP,
SEASONALITY, SHIFT_WINDOW, THRESHOLD_AGE, 1,
N_CHAIN, N_ITER, N_CORES_PER_CHAIN,
LAMBDA_PROP_SD, NU_PROP_SD, ETA_PROP_SD,
LOGIT_RHO_PROP_SD, LOG_GAMMA_PROP_SD,
LOG_GAMMA_PRIOR_SD, LOGIT_RHO_PRIOR_SD) %>%
mutate(LAMBDA2=LAMBDA2*mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]))
write_rds(bind_cols(MCMC_results, PREL=PREL_VALS[n]),
paste0("Spf66_supplementary_fits/Sporozoite_ratio/prel_sensitivity_analysis_", n, ".rds"))
MCMC_results_prel[[n]] <- bind_cols(MCMC_results, PREL=PREL_VALS[n])
} else {
MCMC_results_prel[[n]] <- read_rds(paste0("Spf66_supplementary_fits/Sporozoite_ratio/prel_sensitivity_analysis_", n, ".rds"))
}
}
MCMC_results_prel <- append(MCMC_results_prel, list(bind_cols(MCMC_results_main, PREL=PREL_BASELINE))) %>%
lapply(function(x) x %>% subset(ITER>BURNIN_PROP*max(x$ITER)))
The baseline model is predicated on the assumption that there is no age-stratification in the force of inoculation between birth and age 15. The assumption of homogenoeity between ages 2 and 15 is informed by the absence of clear age strucuture in the incidence of symptomatic falciparum malaria in the SPf66 trial. However, it is plausible that behavioural differences mitigated the risk of exposure between birth and age 2. We thus re-fit the model, assuming that the force of inoculation between birth and age 2 is dampened by a fixed factor U.
U_VALS <- c(0.1, 0.25, 0.5, 0.75)
age_foi_seeds <- c("07101907", "15101999", "23071906", "27021931")
MCMC_results_age_foi <- list()
for (n in 1:length(U_VALS)) {
if (RUN_ALL || !file.exists(paste0("Spf66_supplementary_fits/Age_stratified_FOI/age_stratified_foi_", n, ".rds"))) {
set.seed(age_foi_seeds[n])
# rescale LAMBDA2 to be the mean FOI in the second stage of the study
MCMC_results <- Metropolis_Hastings(inf_states_by_VIN, N_AGES, PREL_BASELINE, N_OBS, TIME_STEP,
SEASONALITY, SHIFT_WINDOW, THRESHOLD_AGE, U_VALS[n],
N_CHAIN, N_ITER, N_CORES_PER_CHAIN,
LAMBDA_PROP_SD, NU_PROP_SD, ETA_PROP_SD,
LOGIT_RHO_PROP_SD, LOG_GAMMA_PROP_SD,
LOG_GAMMA_PRIOR_SD, LOGIT_RHO_PRIOR_SD) %>%
mutate(LAMBDA2=LAMBDA2*mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]))
write_rds(bind_cols(MCMC_results, U=U_VALS[n]),
paste0("Spf66_supplementary_fits/Age_stratified_FOI/age_stratified_foi_", n, ".rds"))
MCMC_results_age_foi[[n]] <- bind_cols(MCMC_results, U=U_VALS[n])
} else {
MCMC_results_age_foi[[n]] <- read_rds(paste0("Spf66_supplementary_fits/Age_stratified_FOI/age_stratified_foi_", n, ".rds"))
}
}
MCMC_results_age_foi <- append(MCMC_results_age_foi, list(bind_cols(MCMC_results_main, U=1))) %>%
lapply(function(x) x %>% subset(ITER>BURNIN_PROP*max(x$ITER)))
We also assess whether the assumed model of age-stratification in the
force of inoculation can better recaptiulate non-monotonic age structure
in the incidence of symptomatic vivax malaria.
posterior_predictive_age_foi <- lapply(MCMC_results_age_foi, function(x) {
posterior_draws <- x[sample(1:nrow(x), N_POSTERIOR_PREDICTIVE_DATASETS),]
posterior_predictive_cohorts <-
lapply(1:N_POSTERIOR_PREDICTIVE_DATASETS, function(i) {
simulate_dataset(time_dependent_fori(posterior_draws[i, "LAMBDA1"],
posterior_draws[i, "LAMBDA2"]/mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]),
SHIFT_WINDOW, SEASONALITY),
TIME_STEP, N_AGES, N_OBS, posterior_draws[i, "NU"], 1,
posterior_draws[i, "ETA"], PREL_BASELINE, PROPHYLAXIS_WINDOW, BUNCHING_WINDOW,
calculate_pclin(N_AGES, posterior_draws[i, "LOGIT_RHO"],
posterior_draws[i, "LOG_GAMMA"], MIN_AGE, MAX_AGE),
masking_matrix, posterior_draws[i, "U"], MIN_AGE)})
return(list(sim_data=posterior_predictive_cohorts, U=posterior_draws[1, "U"]))})
We focus on the incidence rate, computed as the quotient of the total number of symptomatic vivax episodes and the cumulative duration of clinical follow-up (adjusted for left- and right-censoring, documented absences from the camp and post-exposure prophylaxis) for each age group.
simulated_incidence_rate <- lapply(posterior_predictive_age_foi, function(sim_cohorts) {
sapply(sim_cohorts[["sim_data"]], function(x) {
split(x, patient_metadata[rownames(x), "AGE"]) %>%
sapply(function(y) sum(y==1, na.rm = TRUE)/sum(!is.na(y))*365%/%TIME_STEP)}) %>%
apply(1, function(x) quantile(x, c(0.025, 0.5, 0.975))) %>%
t %>% as.data.frame %>%
mutate(AGE=as.numeric(rownames(.)), U=sim_cohorts[["U"]],
FORI_label=factor(paste0("FOI under age of 2\ndampened by factor u=", U),
levels=paste0("FOI under age of 2\ndampened by factor u=", c(U_VALS, 1))),
source="Posterior predictive")}) %>%
bind_rows()
N_BOOTSTRAP_REPLICATES <- 2000
observed_incidence_bootstrap <- lapply(1:N_BOOTSTRAP_REPLICATES, function(i) {
observed_incidence_by_indiv[sample(1:N_COHORT, N_COHORT, replace=T),]})
observed_incidence_rate <- lapply(observed_incidence_bootstrap, function(x) {
x %>% group_by(AGE) %>% summarise(mean_incidence=sum(N_Pv)/sum(Followup_Pv))}) %>%
bind_rows(.id="iter") %>%
group_by(AGE) %>%
summarise(`2.5%`=quantile(mean_incidence, 0.025),
`50%`=quantile(mean_incidence, 0.5),
`97.5%`=quantile(mean_incidence, 0.975)) %>%
mutate(source="Observed")